package com.itextpdf.text.pdf; import com.itextpdf.testutils.CompareTool; import com.itextpdf.text.Document; import com.itextpdf.text.DocumentException; import com.itextpdf.text.error_messages.MessageLocalization; import com.itextpdf.text.pdf.parser.TaggedPdfReaderTool; import java.io.*; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.junit.*; import org.xml.sax.SAXException; public class TaggedPdfCopyTest { Document document; PdfCopy copy; String output; public static final String NO_PARENT_TREE = "The document does not contain ParentTree"; public static final String NO_CLASS_MAP = "The document does not contain ClassMap"; public static final String NO_ROLE_MAP = "The document does not contain RoleMap"; public static final String NO_STRUCT_TREE_ROOT = "No StructTreeRoot found"; public static final String NO_ID_TREE = "The document does not contain ID Tree"; public static final String EMPTY_ID_TREE = "The document's ID Tree is empty"; public static final String SOURCE4 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source4.pdf"; public static final String SOURCE10 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source10.pdf"; public static final String SOURCE11 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source11.pdf"; public static final String SOURCE12 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source12.pdf"; public static final String SOURCE16 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source16.pdf"; public static final String SOURCE17 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source17.pdf"; public static final String SOURCE18 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source18.pdf"; public static final String SOURCE19 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source19.pdf"; public static final String SOURCE22 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source22.pdf"; public static final String SOURCE24 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source24.pdf"; public static final String SOURCE25 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source25.pdf"; public static final String SOURCE25_1 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source25_1.pdf"; public static final String SOURCE32 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source32.pdf"; public static final String SOURCE42 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source42.pdf"; public static final String SOURCE51 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source51.pdf"; public static final String SOURCE52 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source52.pdf"; public static final String SOURCE53 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source53.pdf"; public static final String SOURCE61 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source61.pdf"; public static final String SOURCE62 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source62.pdf"; public static final String SOURCE63 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source63.pdf"; public static final String SOURCE64 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source64.pdf"; public static final String SOURCE72 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source72.pdf"; public static final String SOURCE73 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source73.pdf"; public static final String SOURCE81 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/source81.pdf"; public static final String DEV_805 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/dev-805.pdf"; public static final String SOURCE_CF_11 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf11.pdf"; public static final String SOURCE_CF_12 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf12.pdf"; public static final String SOURCE_CF_13 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf13.pdf"; public static final String SOURCE_CF_14 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf14.pdf"; public static final String SOURCE_CF_15 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf15.pdf"; public static final String SOURCE_CF_16 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/sourceCf16.pdf"; public static final String CMP25 = "./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/pdf/cmp_out25.pdf"; public static final String OUT = "./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/out"; public static final String OUTPATH = "./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/"; public static final PdfDictionary CM31 = new PdfDictionary(); public static final PdfDictionary sElem = new PdfDictionary(); //<</O/Layout/EndIndent 18.375/StartIndent 11.25/TextIndent -11.25/LineHeight 13>> //<</C/SC.7.147466/Pg 118 0 R/Type/StructElem/K 3/S/Span/Lang(en)/P 1 0 R>> static { CM31.put(PdfName.O, new PdfName("Layout")); CM31.put(new PdfName("EndIndent"), new PdfNumber(18.375)); CM31.put(new PdfName("StartIndent"), new PdfNumber(11.25)); CM31.put(new PdfName("TextIndent"), new PdfNumber(-11.25)); CM31.put(new PdfName("LineHeight"), new PdfNumber(13)); sElem.put(PdfName.C, new PdfName("SC.7.147466")); sElem.put(PdfName.K, new PdfNumber(5)); sElem.put(PdfName.S, PdfName.SPAN); sElem.put(PdfName.LANG, new PdfString("en")); } @Before public void init() throws FileNotFoundException, DocumentException { new File("./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/").mkdirs(); Document.compress = false; } private void initializeDocument(String name) throws DocumentException, FileNotFoundException { this.output = OUT + name + ".pdf"; document = new Document(); copy = new PdfCopy(document, new FileOutputStream(output)); copy.setTagged(); document.open(); } @Test(expected = BadPdfFormatException.class) public void classMapConflict() throws IOException, DocumentException { initializeDocument("-cmc"); PdfReader reader1 = new PdfReader(SOURCE11); try { copy.addPage(copy.getImportedPage(reader1, 76, true)); } catch (BadPdfFormatException e) {} reader1.close(); PdfReader reader2 = new PdfReader(SOURCE12); copy.addPage(copy.getImportedPage(reader2, 76, true)); reader2.close(); } @Test(expected = BadPdfFormatException.class) public void roleMapConflict() throws IOException, DocumentException { initializeDocument("-rolemap"); PdfReader reader1 = new PdfReader(SOURCE11); //PdfDictionary trailer = reader1.trailer; try { copy.addPage(copy.getImportedPage(reader1, 76, true)); } catch (BadPdfFormatException e) {} reader1.close(); PdfReader reader2 = new PdfReader(SOURCE22); copy.addPage(copy.getImportedPage(reader2, 76, true)); reader2.close(); } @Test public void pdfMergeTest() throws IOException, DocumentException { initializeDocument("-merge"); int n = 14; PdfReader reader1 = new PdfReader(SOURCE11); copy.addPage(copy.getImportedPage(reader1, 76, true)); copy.addPage(copy.getImportedPage(reader1, 83, true)); PdfReader reader2 = new PdfReader(SOURCE32); copy.addPage(copy.getImportedPage(reader2, 69, true)); copy.addPage(copy.getImportedPage(reader2, 267, true)); document.close(); reader1.close(); reader2.close(); PdfReader reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 2, "Kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); int[] nums = new int[] {44, 0, 65, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 81}; for (int i = 0; i < n; ++i) verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1), true); PdfDictionary ClassMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.CLASSMAP)), NO_CLASS_MAP); PdfDictionary CM31 = verifyIsDictionary(PdfStructTreeController.getDirectObject(ClassMap.get(new PdfName("CM31"))), "ClassMap does not contain.\"CM31\""); if (!PdfStructTreeController.compareObjects(TaggedPdfCopyTest.CM31, CM31)) Assert.fail("ClassMap contains incorrect \"CM31\""); PdfDictionary RoleMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.ROLEMAP)), NO_ROLE_MAP); if (!PdfName.SPAN.equals(RoleMap.get(new PdfName("ParagraphSpan")))) throw new BadPdfFormatException("RoleMap does not contain \"ParagraphSpan\""); reader.close(); } @Test public void copyTaggedPdf0() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("0"); PdfReader reader = new PdfReader(SOURCE11); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); document.close(); reader.close(); Assert.assertEquals(getCommonNumsCount(SOURCE11), getCommonNumsCount(output)); reader = new PdfReader(output); PdfDictionary structTreeRoot = (PdfDictionary)reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT); PdfDictionary ClassMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.CLASSMAP)), NO_CLASS_MAP); if (ClassMap.size() != 109) Assert.fail("ClassMap incorrect"); String[] CMs = new String[]{"CM84", "CM81", "CM80", "CM87", "CM88", "CM9", "CM94", "CM95", "CM96", "CM97", "CM90", "CM91", "CM92", "CM93", "CM98", "CM99", "CM16", "CM17", "CM14", "CM15", "CM12", "CM13", "CM10", "CM19", "CM20", "CM21", "CM22", "CM23", "CM24", "CM25", "CM26", "CM27", "CM28", "CM29", "CM100", "CM101", "CM102", "CM103", "CM105", "CM106", "CM30", "CM31", "CM34", "CM35", "CM32", "CM33", "CM38", "CM39", "CM36", "CM118", "CM117", "CM49", "CM48", "CM116", "CM115", "CM47", "CM114", "CM46", "CM113", "CM45", "CM112", "CM44", "CM43", "CM111", "CM42", "CM110", "CM41", "CM108", "CM109", "CM127", "CM126", "CM58", "CM129", "CM128", "CM55", "CM123", "CM54", "CM125", "CM57", "CM56", "CM51", "CM50", "CM53", "CM120", "CM52", "CM119", "CM68", "CM136", "CM135", "CM67", "CM133", "CM139", "CM60", "CM132", "CM64", "CM63", "CM62", "CM61", "CM145", "CM76", "CM78", "CM1", "CM2", "CM71", "CM70", "CM73", "CM141", "CM72", "CM74"}; for (int i = 0; i < CMs.length; ++i) verifyIsDictionary(PdfStructTreeController.getDirectObject(ClassMap.get(new PdfName(CMs[i]))), "ClassMap does not contain \""+CMs[i]+"\""); PdfDictionary RoleMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.ROLEMAP)), NO_ROLE_MAP); if (!PdfName.SPAN.equals(RoleMap.get(new PdfName("ParagraphSpan")))) throw new BadPdfFormatException("RoleMap does not contain \"ParagraphSpan\"."); //if (reader.eofPos != 3378440L) Assert.fail("Invalid size of pdf."); reader.close(); compareResults("0"); } @Test public void copyTaggedPdf1() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("1"); PdfReader reader = new PdfReader(SOURCE32); copy.addPage(copy.getImportedPage(reader, 5, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, 22, "Nums"); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(1)), 61, "Nums of page 1"); reader.close(); compareResults("1"); } @Test public void copyTaggedPdf2() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("2"); PdfReader reader = new PdfReader(SOURCE16); copy.addPage(copy.getImportedPage(reader, 2, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, 2, "Nums"); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(1)), 7, "Nums of page 1"); reader.close(); compareResults("2"); } @Test public void copyTaggedPdf3() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("3"); PdfReader reader = new PdfReader(SOURCE10); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {16, 87, 128, 74, 74, 74, 26}; for (int i = 0; i < n; ++i) verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1)); reader.close(); compareResults("3"); } @Test public void copyTaggedPdf4() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("4"); PdfReader reader = new PdfReader(SOURCE10); int n = reader.getNumberOfPages(); for (int i = n; i > 0; --i) copy.addPage(copy.getImportedPage(reader, i, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 7, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {26, 74, 74, 74, 128, 87, 16}; for (int i = 0; i < n; ++i) verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1)); reader.close(); compareResults("4"); } @Test public void copyTaggedPdf5() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("5"); PdfReader reader = new PdfReader(SOURCE10); int n = 3; copy.addPage(copy.getImportedPage(reader, 1, true)); copy.addPage(copy.getImportedPage(reader, 3, true)); copy.addPage(copy.getImportedPage(reader, 7, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {16, 128, 26}; for (int i = 0; i < n; ++i) //nums[i] = ((PdfArray)PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1))).size(); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1)); reader.close(); compareResults("5"); } @Test public void copyTaggedPdf6() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("6"); PdfReader reader = new PdfReader(SOURCE11); int n = 12; copy.addPage(copy.getImportedPage(reader, 1, true)); copy.addPage(copy.getImportedPage(reader, 25, true)); copy.addPage(copy.getImportedPage(reader, 7, true)); copy.addPage(copy.getImportedPage(reader, 48, true)); copy.addPage(copy.getImportedPage(reader, 50, true)); copy.addPage(copy.getImportedPage(reader, 2, true)); copy.addPage(copy.getImportedPage(reader, 8, true)); copy.addPage(copy.getImportedPage(reader, 90, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 6, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {5, 0, 33, 12, 0, 48, 35, 182, 0, 0, 17, 37}; for (int i = 0; i < n; ++i) verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1), true); PdfDictionary ClassMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.CLASSMAP)), NO_CLASS_MAP); if (ClassMap.size() != 27) Assert.fail("ClassMap incorrect"); String[] CMs = new String[]{"CM118", "CM117", "CM133", "CM47", "CM46", "CM114", "CM43", "CM110", "CM21", "CM22", "CM26", "CM27", "CM145", "CM128", "CM29", "CM56", "CM1", "CM2", "CM72", "CM16", "CM34", "CM17", "CM14", "CM15", "CM119", "CM12", "CM13"}; for (int i = 0; i < CMs.length; ++i) verifyIsDictionary(PdfStructTreeController.getDirectObject(ClassMap.get(new PdfName(CMs[i]))), "ClassMap.does.not.contain.\""+CMs[i]+"\""); PdfDictionary RoleMap = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.ROLEMAP)), NO_ROLE_MAP); if (!PdfName.SPAN.equals(RoleMap.get(new PdfName("ParagraphSpan")))) throw new BadPdfFormatException("RoleMap does not contain \"ParagraphSpan\"."); //if (reader.eofPos != 249068) Assert.fail("Invalid size of pdf."); reader.close(); compareResults("6"); } @Test public void copyTaggedPdf7() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("7"); PdfReader reader = new PdfReader(SOURCE16); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; ++i){ copy.addPage(copy.getImportedPage(reader, i, true)); copy.addPage(copy.getImportedPage(reader, i, true)); } for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); n *= 4; document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 5, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {48, 48, 7, 7, 48, 7, 48, 7}; for (int i = 0; i < n; ++i) verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1)); reader.close(); compareResults("7"); } @Test public void copyTaggedPdf8() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("8"); PdfReader reader = new PdfReader(SOURCE42); int n = reader.getNumberOfPages(); for (int i = 1; i <= n; ++i){ copy.addPage(copy.getImportedPage(reader, i, true)); copy.addPage(copy.getImportedPage(reader, i, true)); } for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); for (int i = 1; i <= n; ++i) copy.addPage(copy.getImportedPage(reader, i, true)); n = 52; document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 6, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); // int[] nums = new int[] {42, 42, 11, 11, 13, 13, 42, 11, 13, 42, 11, 13}; // for (int i = 0; i < n; ++i) // verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1)); reader.close(); compareResults("8"); } @Test public void copyTaggedPdf9() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("9"); PdfReader reader4 = new PdfReader(SOURCE4); PdfReader reader10 = new PdfReader(SOURCE10); PdfReader reader32 = new PdfReader(SOURCE32); int n = 40; copy.addPage(copy.getImportedPage(reader4, 1, true)); copy.addPage(copy.getImportedPage(reader10, 2, true)); copy.addPage(copy.getImportedPage(reader10, 3, true)); copy.addPage(copy.getImportedPage(reader10, 7, true)); copy.addPage(copy.getImportedPage(reader32, 50, true)); copy.addPage(copy.getImportedPage(reader32, 55, true)); copy.addPage(copy.getImportedPage(reader4, 1, true)); copy.addPage(copy.getImportedPage(reader32, 50, true)); copy.addPage(copy.getImportedPage(reader32, 55, true)); copy.addPage(copy.getImportedPage(reader32, 56, true)); copy.addPage(copy.getImportedPage(reader32, 60, true)); copy.addPage(copy.getImportedPage(reader10, 3, true)); copy.addPage(copy.getImportedPage(reader10, 4, true)); copy.addPage(copy.getImportedPage(reader10, 1, true)); copy.addPage(copy.getImportedPage(reader32, 1, true)); copy.addPage(copy.getImportedPage(reader32, 15, true)); copy.addPage(copy.getImportedPage(reader32, 20, true)); copy.addPage(copy.getImportedPage(reader32, 5, true)); copy.addPage(copy.getImportedPage(reader4, 1, true)); copy.addPage(copy.getImportedPage(reader10, 7, true)); document.close(); reader4.close(); reader10.close(); reader32.close(); PdfReader reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 11, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, n*2, "Nums"); int[] nums = new int[] {7, 87, 128, 26, 135, 0, 0, 83, 7, 135, 0, 0, 0, 0, 0, 0, 83, 116, 26, 128, 74, 16, 12, 0, 0, 38, 54, 61, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 26}; for (int i = 0; i < n; ++i) // nums[i] = ((PdfArray)PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1))).size(); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(i*2+1)), nums[i], "Nums of page "+(i+1), true); reader.close(); compareResults("9"); } @Test public void copyTaggedPdf10() throws IOException, DocumentException, ParserConfigurationException, SAXException { //source17: StructTreeRoot has no kids - incorrect syntax of tags - try to fix in result pdf initializeDocument("10"); PdfReader reader = new PdfReader(SOURCE17); copy.addPage(copy.getImportedPage(reader, 2, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, 2, "Nums"); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(1)), 7, "Nums of page 1"); reader.close(); compareResults("10"); } @Test(expected = BadPdfFormatException.class) public void copyTaggedPdf11() throws IOException, DocumentException, ParserConfigurationException, SAXException { //source51: invalid nums - references to PdfDictionary, all pages has the same "NumDictionary" // 58 0 obj // <</Nums[0 2 0 R 1 2 0 R 2 2 0 R 3 2 0 R 4 2 0 R 5 2 0 R 6 2 0 R]>> // endobj //where 2 0 R is StructElement of Document initializeDocument("11"); PdfReader reader = new PdfReader(SOURCE51); copy.addPage(copy.getImportedPage(reader, 2, true)); reader.close(); } @Test(expected = BadPdfFormatException.class) public void copyTaggedPdf12() throws IOException, DocumentException, ParserConfigurationException, SAXException { //source52: Nums array is empty: // 58 0 obj // <</Nums[ ]>> // endobj initializeDocument("12"); PdfReader reader = new PdfReader(SOURCE52); copy.addPage(copy.getImportedPage(reader, 2, true)); reader.close(); } @Test(expected = BadPdfFormatException.class) public void copyTaggedPdf13() throws IOException, DocumentException, ParserConfigurationException, SAXException { //source53: StructTreeRoot doesn't have kids and Nums is empty initializeDocument("13"); PdfReader reader = new PdfReader(SOURCE53); copy.addPage(copy.getImportedPage(reader, 2, true)); reader.close(); } @Test public void copyTaggedPdf14() throws IOException, DocumentException { initializeDocument("14"); PdfReader reader = new PdfReader(SOURCE11); copy.addPage(copy.getImportedPage(reader, 5, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); verifyArraySize(structTreeRoot.get(PdfName.K), 1, "Invalid count of kids in StructTreeRoot"); PdfObject obj = PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE)); verifyIsDictionary(obj, NO_PARENT_TREE); PdfArray array = ((PdfDictionary)obj).getAsArray(PdfName.NUMS); verifyArraySize(array, 8, "Nums"); verifyArraySize(PdfStructTreeController.getDirectObject(array.getPdfObject(1)), 20, "Nums of page 1"); reader.close(); } @Test public void copyTaggedPdf15() throws IOException, DocumentException { initializeDocument("15"); copy.setMergeFields(); PdfReader reader1 = new PdfReader(SOURCE61); PdfReader reader2 = new PdfReader(SOURCE62); copy.addDocument(reader1); copy.addDocument(reader2); document.close(); reader1.close(); reader2.close(); PdfReader reader = new PdfReader(output); PdfDictionary catalog = reader.getCatalog(); PdfDictionary structTreeRoot = catalog.getAsDict(PdfName.STRUCTTREEROOT); PdfDictionary structParent = structTreeRoot.getAsDict(PdfName.PARENTTREE); PdfArray nums = structParent.getAsArray(PdfName.NUMS); PdfDictionary acroForm = catalog.getAsDict(PdfName.ACROFORM); PdfDictionary fonts = acroForm.getAsDict(PdfName.DR).getAsDict(PdfName.FONT); Assert.assertEquals(new PdfName("Helvetica"), fonts.getAsDict(new PdfName("Helv")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("ZapfDingbats"), fonts.getAsDict(new PdfName("ZaDb")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("ArialMT"), fonts.getAsDict(new PdfName("ArialMT")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("CourierStd"), fonts.getAsDict(new PdfName("CourierStd")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(1, nums.getAsNumber(2).intValue()); Assert.assertEquals(4, nums.getAsNumber(8).intValue()); Assert.assertEquals(nums.size(), 12); Assert.assertEquals(acroForm.getAsArray(PdfName.FIELDS).size(), 3); reader.close(); } @Test @Ignore public void copyTaggedPdf16() throws IOException, DocumentException { initializeDocument("16"); copy.setMergeFields(); PdfReader reader1 = new PdfReader(SOURCE63); PdfReader reader2 = new PdfReader(SOURCE64); copy.addDocument(reader1); copy.addDocument(reader2); document.close(); reader1.close(); reader2.close(); PdfReader reader = new PdfReader(output); PdfDictionary catalog = reader.getCatalog(); PdfDictionary acroForm = catalog.getAsDict(PdfName.ACROFORM); PdfDictionary fonts = acroForm.getAsDict(PdfName.DR).getAsDict(PdfName.FONT); Assert.assertEquals(new PdfName("Helvetica"), fonts.getAsDict(new PdfName("Helv")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("Courier"), fonts.getAsDict(new PdfName("Cour")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("Times-Bold"), fonts.getAsDict(new PdfName("TiBo")).getAsName(PdfName.BASEFONT)); Assert.assertEquals(new PdfName("ZapfDingbats"), fonts.getAsDict(new PdfName("ZaDb")).getAsName(PdfName.BASEFONT)); reader.close(); } @Test public void copyTaggedPdf17() throws IOException, DocumentException { initializeDocument("17"); PdfReader reader1 = new PdfReader(SOURCE10); PdfReader reader2 = new PdfReader(SOURCE19); copy.addPage(copy.getImportedPage(reader1, 1, true)); copy.addPage(copy.getImportedPage(reader2, 1, false)); document.close(); reader1.close(); reader2.close(); PdfReader reader = new PdfReader(output); Assert.assertEquals(2, reader.getNumberOfPages()); Assert.assertNotNull(reader.getPageN(1)); Assert.assertNotNull(reader.getPageN(2)); reader.close(); } @Test public void copyTaggedPdf18() throws IOException, DocumentException { initializeDocument("18"); copy.setMergeFields(); PdfReader reader1 = new PdfReader(SOURCE10); PdfReader reader2 = new PdfReader(SOURCE19); copy.addDocument(reader1); copy.addDocument(reader2); document.close(); PdfReader reader = new PdfReader(output); Assert.assertEquals(reader1.getNumberOfPages() + reader2.getNumberOfPages(), reader.getNumberOfPages()); for (int i = 1; i <= reader.getNumberOfPages(); i++) { Assert.assertNotNull(reader.getPageN(i)); } reader.close(); reader1.close(); reader2.close(); } @Test public void copyTaggedPdf19() throws IOException, DocumentException { initializeDocument("19"); PdfReader reader = new PdfReader(SOURCE18); copy.addPage(copy.getImportedPage(reader, 1, true)); document.close(); reader.close(); reader = new PdfReader(output); PdfDictionary page1 = reader.getPageN(1); PdfDictionary t1_0 = page1.getAsDict(PdfName.RESOURCES).getAsDict(PdfName.XOBJECT).getAsStream(new PdfName("Fm0")).getAsDict(PdfName.RESOURCES).getAsDict(PdfName.FONT).getAsDict(new PdfName("T1_0")); Assert.assertNotNull(t1_0); reader.close(); } @Test public void copyTaggedPdf20() throws IOException, DocumentException, ParserConfigurationException, SAXException { initializeDocument("20"); copy.setMergeFields(); PdfReader reader2 = new PdfReader(SOURCE72); copy.addDocument(reader2, java.util.Arrays.asList(1,3,5)); document.close(); reader2.close(); PdfReader reader = new PdfReader(output); PdfDictionary catalog = reader.getCatalog(); PdfDictionary acroForm = catalog.getAsDict(PdfName.ACROFORM); PdfArray acroFields = acroForm.getAsArray(PdfName.FIELDS); junit.framework.Assert.assertTrue(acroFields.size() == 4); reader.close(); compareResults("20"); } @Test public void copyTaggedPdf21() throws IOException, DocumentException { initializeDocument("21"); copy.setMergeFields(); PdfReader reader1 = new PdfReader(SOURCE73); copy.addDocument(reader1); document.close(); reader1.close(); PdfReader reader = new PdfReader(output); PdfDictionary page = reader.getPageN(1); PdfDictionary resources = page.getAsDict(PdfName.RESOURCES); PdfDictionary xObject = resources.getAsDict(PdfName.XOBJECT); PdfStream img = xObject.getAsStream(new PdfName("Im0")); PdfArray decodeParms = img.getAsArray(PdfName.DECODEPARMS); junit.framework.Assert.assertEquals(2, decodeParms.size()); PdfObject iref = decodeParms.getPdfObject(0); junit.framework.Assert.assertTrue(iref instanceof PdfIndirectReference); junit.framework.Assert.assertTrue(reader.getPdfObjectRelease(((PdfIndirectReference)iref).getNumber()) instanceof PdfNull); reader.close(); } //Check for crash in case of structure element contains no "Pg" keys. @Test public void copyTaggedPdf22() throws IOException, DocumentException { initializeDocument("22"); PdfReader reader = new PdfReader(DEV_805); int n = reader.getNumberOfPages(); for (int page = 0; page < n; ) { copy.addPage(copy.getImportedPage(reader, ++page,true)); } copy.freeReader(reader); document.close(); reader.close(); } @Test public void copyTaggedPdf23() throws IOException, DocumentException { PdfReader reader = new PdfReader(SOURCE81); PdfDictionary structTreeRoot = verifyIsDictionary(reader.getCatalog().getDirectObject(PdfName.STRUCTTREEROOT), NO_STRUCT_TREE_ROOT); PdfDictionary idTree = verifyIsDictionary(PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.IDTREE)), NO_ID_TREE); Assert.assertTrue(EMPTY_ID_TREE, idTree.hashMap.size() > 0); } @Test(expected = BadPdfFormatException.class) public void copyTaggedPdf24() throws IOException, DocumentException { initializeDocument("24"); PdfReader reader1 = new PdfReader(SOURCE24); copy.addPage(copy.getImportedPage(reader1, 17, true)); document.close(); reader1.close(); } @Test(timeout = 60000) public void copyTaggedPdf25() throws IOException, DocumentException, InterruptedException { initializeDocument("25"); PdfReader reader = new PdfReader(SOURCE25); PdfReader reader1 = new PdfReader(SOURCE25_1); copy.addDocument(reader); copy.freeReader(reader); copy.addDocument(reader1); copy.freeReader(reader1); copy.getStructureTreeRoot(); copy.close(); document.close(); reader.close(); reader1.close(); CompareTool compareTool = new CompareTool(); String errorMessage = compareTool.compareByContent(output, CMP25, OUTPATH, "diff_"); if (errorMessage != null) { Assert.fail(errorMessage); } } @Test public void copyFields1Test() throws DocumentException, IOException, InterruptedException, ParserConfigurationException, SAXException { initializeDocument("CopyFields1"); copy.setMergeFields(); PdfReader readerMain = new PdfReader(SOURCE_CF_14); PdfReader secondSourceReader = new PdfReader(SOURCE_CF_15); //PdfReader thirdReader = new PdfReader("./src/test/resources/com/itextpdf/text/pdf/PdfCopyTest/appearances1.pdf"); copy.addDocument(readerMain); copy.copyDocumentFields(secondSourceReader); //copy.addDocument(thirdReader); copy.close(); readerMain.close(); secondSourceReader.close(); //thirdReader.close(); /*CompareTool compareTool = new CompareTool("./target/com/itextpdf/test/pdf/PdfCopyTest/copyFields.pdf", "./src/test/resources/com/itextpdf/text/pdf/PdfCopyTest/cmp_copyFields.pdf"); String errorMessage = compareTool.compareByContent("./target/com/itextpdf/test/pdf/PdfCopyTest/", "diff"); if (errorMessage != null) { junit.framework.Assert.fail(errorMessage); }*/ compareResults("CopyFields1"); } @Test public void copyFields2Test() throws DocumentException, IOException, InterruptedException, ParserConfigurationException, SAXException { initializeDocument("CopyFields2"); copy.setMergeFields(); PdfReader readerMain = new PdfReader(SOURCE_CF_11); PdfReader secondSourceReader = new PdfReader(SOURCE_CF_14); copy.addDocument(readerMain); copy.copyDocumentFields(secondSourceReader); copy.close(); readerMain.close(); secondSourceReader.close(); compareResults("CopyFields2"); } @Test public void copyFields3Test() throws DocumentException, IOException, InterruptedException, ParserConfigurationException, SAXException { initializeDocument("CopyFields3"); copy.setMergeFields(); PdfReader readerMain = new PdfReader(SOURCE_CF_12); PdfReader secondSourceReader = new PdfReader(SOURCE_CF_11); copy.addDocument(readerMain); copy.copyDocumentFields(secondSourceReader); copy.close(); readerMain.close(); secondSourceReader.close(); compareResults("CopyFields3"); } @Test public void copyFields4Test() throws Exception { initializeDocument("CopyFields4"); copy.setMergeFields(); PdfReader readerMain = new PdfReader(SOURCE_CF_13); PdfReader secondSourceReader = new PdfReader(SOURCE_CF_16); copy.addDocument(readerMain); copy.copyDocumentFields(secondSourceReader); copy.close(); readerMain.close(); secondSourceReader.close(); compareResults("CopyFields4"); } @After public void finalize() { Document.compress = true; } private PdfArray verifyArraySize(PdfObject obj, Integer size, String message) { return verifyArraySize(obj, size, message, false); } private PdfArray verifyArraySize(PdfObject obj, Integer size, String message, boolean ignoreIfNotArray) { if (!(obj instanceof PdfArray)) { if (ignoreIfNotArray) return null; else Assert.fail(message + " is not array"); } if (((PdfArray)obj).size() != size) Assert.fail(message+" has wrong size"); return (PdfArray)obj; } private PdfDictionary verifyIsDictionary(PdfObject obj, String message) { if (obj == null || !obj.isDictionary()) Assert.fail(message); return (PdfDictionary)obj; } private void compareResults(String name) throws IOException, ParserConfigurationException, SAXException { PdfReader reader = new PdfReader("./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/out"+ name +".pdf"); FileOutputStream xmlOut = new FileOutputStream("./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/test"+ name +".xml"); new MyTaggedPdfReaderTool().convertToXml(reader, xmlOut); xmlOut.close(); Assert.assertTrue(compareXmls("./src/test/resources/com/itextpdf/text/pdf/TaggedPdfCopyTest/xml/test"+ name +".xml", "./target/com/itextpdf/test/pdf/TaggedPdfCopyTest/test"+ name +".xml")); } private boolean compareXmls(String xml1, String xml2) throws ParserConfigurationException, SAXException, IOException { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); dbf.setCoalescing(true); dbf.setIgnoringElementContentWhitespace(true); dbf.setIgnoringComments(true); DocumentBuilder db = dbf.newDocumentBuilder(); org.w3c.dom.Document doc1 = db.parse(new File(xml1)); doc1.normalizeDocument(); org.w3c.dom.Document doc2 = db.parse(new File(xml2)); doc2.normalizeDocument(); return doc2.isEqualNode(doc1); } private int getCommonNumsCount(String filename) throws IOException { PdfReader reader = new PdfReader(filename); PdfDictionary structTreeRoot = reader.getCatalog().getAsDict(PdfName.STRUCTTREEROOT); PdfArray kids = ((PdfDictionary)PdfStructTreeController.getDirectObject(structTreeRoot.get(PdfName.PARENTTREE))).getAsArray(PdfName.KIDS); int cnt = 0; for (int i = 0; i < kids.size(); i++) { PdfArray nums = kids.getAsDict(i).getAsArray(PdfName.NUMS); cnt += nums.size(); } reader.close(); return cnt; } static class MyTaggedPdfReaderTool extends TaggedPdfReaderTool { @Override public void convertToXml(PdfReader reader, OutputStream os, String charset) throws IOException { this.reader = reader; OutputStreamWriter outs = new OutputStreamWriter(os, charset); out = new PrintWriter(outs); out.write("<root>"); // get the StructTreeRoot from the root object PdfDictionary catalog = reader.getCatalog(); PdfDictionary struct = catalog.getAsDict(PdfName.STRUCTTREEROOT); if (struct == null) throw new IOException(MessageLocalization.getComposedMessage("no.structtreeroot.found")); // Inspect the child or children of the StructTreeRoot inspectChild(struct.getDirectObject(PdfName.K)); out.write("</root>"); out.flush(); out.close(); } } }